/************************************************************************
Purpose: Preparing data sets for ETS structural model	
*************************************************************************/

clear all
clear matrix
set more off
set linesize 255
pause on

use "$PHONE_DATA_OUT/apcd_panel_plant.dta", clear

** Merge CEMS emissions data used for emissions analysis
merge 1:m gpcb_id using "$EMISSIONS_DATA_OUT/pm_mass_plant-period.dta"
* 292 plants x 10 periods (included in analysis) + 26 plants (not in analysis)
* = 2,946 observations
drop _merge treatmentstatus
order period_num, after(gpcb_name)
sort gpcb_id period_num

** Merge periods data
rename period_num period
merge m:1 period using "$TRADING_DATA_OUT/index_period.dta"
drop _merge
order period, after(apcd_unitcost_ope_esp)

** Prorate emissions based on compliance period length
local rule_num 0 A B
foreach r of local rule_num{
	gen pm_mass_val_`r'_prorated = pm_mass_val_`r' / compliance_length * 30
	label var pm_mass_val_`r'_prorated "Plant-period Validated Mass Emissions Prorated (kg / month) - Rule `r'"
} 
drop pm_mass_val_0 pm_mass_val_A pm_mass_val_B

** Merge with potential maximum emissions
merge m:1 gpcb_id using "$EMISSIONS_DATA_OUT/potential_max_emissions.dta", ///
	keepusing(AverageUncontrolledMass12 AverageUncontrolledMass16)
drop _merge
rename AverageUncontrolledMass12 pm_mass_potential_max_12
rename AverageUncontrolledMass16 pm_mass_potential_max_16
* Check with Prajval if these are already prorated?
label var pm_mass_potential_max_12 "Plant-period Potential Maximum Mass Emissions (kg / month) - 12 hours"
label var pm_mass_potential_max_16 "Plant-period Potential Maximum Mass Emissions (kg / month) - 16 hours"

** Merge with trading data 
rename period period_num
merge 1:1 gpcb_id period_num using "$TRADING_DATA_CLEAN/panel_plant-period.dta"
rename _merge D_trade
label define D_trade 0 "Not in trade data" 1 "Present in trade data"
replace D_trade = 0 if D_trade==1
replace D_trade = 1 if D_trade==3
label values D_trade D_trade
label var D_trade "=1 if present in trade data"
order D_trade, after(D_analysis)
drop period_end compliance_end period_cap_prorated compliance_cap
rename compliance_month_cap period_cap_prorated

** Add apcd_max variable
gen apcd_max = 0
replace apcd_max = 1 if (apcd_present_cyclone == 1) & (apcd_present_bagfilter == 0) & (apcd_present_scrubber == 0) & (apcd_present_esp == 0)
replace apcd_max = 2 if (apcd_present_bagfilter == 1) & (apcd_present_scrubber == 0) & (apcd_present_esp == 0)
replace apcd_max = 3 if (apcd_present_scrubber == 1) & (apcd_present_esp == 0)
replace apcd_max = 4 if (apcd_present_esp == 1)

** Data notes for Nick, and to-do items.
* APCD maint/ope costs = 6% and 3% resp of installation cost; may update these values
* EDC price = set fixed at Rs. 200 based on manual; don't have actual.
* EDC amount = not actual, but estimated. Also pro-rated. 
* permit_trade_prorated = inv + cons - alloc // theoretically should be: purchased - sold, but it is not.
* We used Rs. because it makes more sense for permits right now.

** Create a new index for plants from 1 to 318
preserve
sort gpcb_id
by gpcb_id: keep if _n==1
sort gpcb_id
gen id_plant = _n
rename gpcb_id id_gpcb
keep id_gpcb id_plant
duplicates drop
tempfile gpcb_ids
save `gpcb_ids'
restore
rename gpcb_id id_gpcb
merge m:1 id_gpcb using `gpcb_ids'
label var id_plant "Unique Plant ID (1-318)"
drop _merge
order id_plant id_gpcb

** Export for MATLAB
* 2,946 observations, 58 variables
gsort -D_treatment -D_analysis id_gpcb period_num
save "$MODEL_DATA_OUT/structural_model_input.dta", replace
export delimited using "$MODEL_DATA_OUT/structural_model_input.txt", nolabel delimiter(tab) replace
